# Discrete Effects ####

# Author: Marco Portmann
# Please ask permission before using this script in your projects
# Last change: 15.05.2024


# Function can be provided with a list of probabilities.
# Unless the user supplies a specific formula for this probabilities,
# these probabilities are standard logit probabilities given by the formula exp(X)/(1+exp(X)).
# The user can specify a formula that is applied to these probabilities.



### Prepare Model ####


trim <- function (x) gsub("^\\s+|\\s+$", "", x)

SplitInteractions <- function(CoefText)
{
  return(unlist(lapply(strsplit(CoefText, '(\\*)|(:)'), trim)))
}



PrepareModel <- function(fit)
{ 
  ModelType <- 'unknown'

  if ('lm' %in% class(fit)) ModelType <- 'lm'
  if ('lrm' %in% class(fit)) ModelType <- 'lrm'
  if ('fixest' %in% class(fit))
  { 
    if (fit$method == "feols")
      ModelType <- 'fixest_ols'
    if (fit$method == "feglm")
    {
      if (fit$family$link == 'logit')
      {
        ModelType <- 'fixest_logit'
      }else(
        stop('Only logit models are supported for fixest.')
      )
    }  

  }
  

  
    
  if (ModelType %in% c("lm", "lrm") && sum(c('x', 'y') %in% names(fit)) != 2) stop('Please add "x=T, y=T" to the list of arguments of your regression function.')
  
  CoefMat <- data.frame(CoefName = names(coef(fit)), Coef = coef(fit), stringsAsFactors = F)
  CoefMat$OriginalName <- CoefMat$CoefName
  VarMat <- vcov(fit)
  
  if (grep("^fixest", ModelType) == 1)
  {
    XMat <- model.matrix(fit)
  }else{
    XMat <- as.data.frame(fit$x)   
  }  

  # Remove NAs
  
  if (length(DelCoefNames <- subset(CoefMat, is.na(Coef))$CoefName) > 0)
  {
    warning(paste('The following coefficients are removed: ', paste(DelCoefNames, collapse =  ', '), sep = ''))
    CoefMat <- subset(CoefMat, !is.na(Coef)) 
    XMat <- XMat[, !colnames(XMat) %in% DelCoefNames]
    VarMat <- VarMat[, !colnames(VarMat) %in% DelCoefNames]
    VarMat <- VarMat[!rownames(VarMat) %in% DelCoefNames, ]
    
  }

  # Rename intercept

  if (InterceptInBrakets <- length(InterceptPosition <- grep('^\\(Intercept\\)$', CoefMat$CoefName)) == 1)
  {
    CoefMat$CoefName[InterceptPosition] <- 'Intercept' 
    colnames(XMat)[colnames(XMat) == "(Intercept)"] <- 'Intercept'
  }else{InterceptPosition <- grep('^Intercept$', CoefMat$CoefName)}
  HasIntercept <- length(InterceptPosition) > 0

  # mp: deleted following line on 2024-04-15   check: can be deleted if all models list the Intercept in fit$x
  ##if (HasIntercept) XMat$Intercept <- 1 
  
  # Interaction terms
  
  IntChar <- '\\*'
  IntCharLeftSpace <- ' \\*'
  IntCharRightSpace <- '\\* '
  
  # list of interaction terms
  iterms <- grep('\\*', CoefMat$CoefName)
  if (length(iterms) == 0)
  {
    iterms <- grep(':', CoefMat$CoefName)
    IntChar <- ':'
    IntCharLeftSpace <- ':'
    IntCharRightSpace <- ':'
  } 

  if (length(InteractionTermPositions <-grep(IntChar,  CoefMat$CoefName))>0)
  {
    InteractionsDis <- lapply( setNames(CoefMat$CoefName[InteractionTermPositions], InteractionTermPositions), SplitInteractions)        
    InteractionsDis <- lapply(InteractionsDis,   function(x) which(CoefMat$CoefName %in% x))   
    Interactions <- data.frame(AtomarCoef = as.numeric(unlist(InteractionsDis)),  InteractionCoef = as.numeric(unlist(mapply(rep, x= names(InteractionsDis), each = lapply(InteractionsDis, length)))))
    CoefMat[InteractionTermPositions, 'CoefName'] <- paste('FE_Interactions_', 1:length(InteractionTermPositions), sep='')  
  }else Interactions <- NULL
  
  
  
  
  if (length(FixedEffects <- grep('(^factor\\()|(=)', CoefMat$CoefName))>0)
  {
    CoefMat[FixedEffects, 'CoefName'] <- paste('FE_SubstName_', 1:length(FixedEffects), sep='')  
  }
  
  
  if (length(Transformations <- grep('\\^', CoefMat$CoefName))>0)
  {
    warning('Polynomials are not yet supported by "DiscreteEffect". Please make sure that lower and upper bounds of polynomials are set consistently.')
    CoefMat[Transformations, 'CoefName'] <- paste('POL_SubstName_', 1:length(Transformations), sep='')  
  }
  
  tmpMap <- lapply(setNames(colnames(XMat), 1:ncol(XMat)),   function(x) which(CoefMat$OriginalName == x))
  tmpMap <- tmpMap[ unlist(lapply(tmpMap, length))>0]
  
  
  colnames(XMat)[as.numeric(names(tmpMap))] <- CoefMat[as.numeric(tmpMap), 'CoefName'] 
  
  tmpMap <- lapply(setNames(colnames(VarMat), 1:ncol(VarMat)),   function(x) which(CoefMat$OriginalName == x))
  tmpMap <- tmpMap[ unlist(lapply(tmpMap, length))>0]
  colnames(VarMat)[as.numeric(names(tmpMap))] <- CoefMat[as.numeric(tmpMap), 'CoefName'] 
  
  
  
  return(list(CoefMat = CoefMat, XMat = XMat, VarMat = VarMat, HasIntercept = HasIntercept, Interactions = Interactions, InteractionTermPositions = InteractionTermPositions, ModelType = ModelType))
}





# DiscreteEffect: main function #####

# Function has to be provided with a list of probabilities 'Probabilities = list( Prob1 = list (X1 = 1, X2 = ... ), Prob2 = list (X1 = 1, X2 = ... ), .... ) ).
# For each of the elements in Probabilities  a probability will be estimated. If the the user provides a logit model this will be of the form:
# Prob1Estim = exp(X)/(1+exp(X), where X = B1 * X1 + B2 * X2 ... 
# For linear models it is simply: Prob1Estim = X = B1 * X1 + B2 * X2 ...
# Additional coefficients for which no values are provided in Probabilities are computed according to FixedValues = list( Xn = 3, Xn+1 = function ) or
# by a default value/function given by FixedValuesDefault in case the user provides no further instruction in neither Probabilities nor FixedValues.
# Prob1Estim, Prob2Estim ... are linked by the LinkinFormula.
# The default value of LinkinFormula is  ~Prob1 - Prob2 


DiscreteEffect <- function(fit, Probabilities, LinkingFormula= ~Prob2 - Prob1, FixedValues = NULL, FixedValuesDefault=median, PrintDebug = F, ShowWarnings = F, Level = 0.95){
  require(car)
  
  EvalFixedValue <- function(evData, iCheckVar)
  {        
    iFName <- names(iCheckVar)
    iFSubst <- CoefMat[CoefMat$OriginalName == iFName, 'CoefName']
    indx <- which(iFSubst == names(evData))
    if (length(indx) != 1)
    {
      if (ShowWarnings) warning(paste("Variable '", iFName, "' is not used in the model.", sep = ''))         
      return(evData)
      break()
    }
    
    if (length(iCheckVar) !=1)  stop(paste("Fixed value for '", iFName, "' is misspecified: Only a single value or function is allowed.", sep = ''))
    
    iFValue <-  iCheckVar[[1]]
    
    if (is.function(iFValue))
      evData[indx] <-do.call(iFValue, list(XMat[,  iFSubst])) else
        evData[indx] <- iFValue
    return(evData)
  }
  
  
  # construct Logit formula
  LogitText <- function(variables){
    txt <- ""
    for (i in 1:(length(variables)-1)){
      txt <- paste(txt, names(variables)[i], "*", variables[i], "+", sep=" ")
    }
    txt <- paste(txt, names(variables)[length(variables)], "*", variables[length(variables)],  sep=" ")
    
    txt <- paste( "exp(", txt, ")/ (1+ exp(",txt, "))", sep="")
    return(txt)
  }
  
  # construct Linear formula
  LinearText <- function(variables){
    txt <- ""
    for (i in 1:(length(variables)-1)){
      txt <- paste(txt, names(variables)[i], "*", variables[i], "+", sep=" ")
    }
    txt <- paste(txt, names(variables)[length(variables)], "*", variables[length(variables)],  sep=" ")
    
    return(paste('(', txt, ')', sep =''))
  }
  
  
  # compute discrete change and variance for variable i
  
  ComputeEffect <- function(ProbX){
    #ProbX <-  Probabilities[1] 
    ProbName <- names(ProbX)

    ProbX <- ProbX[[1]]
    evalDataA <- evalData

    if (length(ProbX)>0)  for (i in 1:length(ProbX)) evalDataA <- EvalFixedValue(evalDataA, ProbX[i])
    
    for (it in InteractionTermPositions)
    {
      evalDataA[CoefMat[it, 'CoefName']] <- prod(evalDataA[CoefMat[subset(Interactions, InteractionCoef == it)$AtomarCoef, 'CoefName']])
    }
    
    
    FixedSummary <- as.character('')
    
    for (k in 1:length(evalDataA))
      FixedSummary <- paste(FixedSummary, '; ', CoefMat[CoefMat$CoefName == names(evalDataA)[k], 'OriginalName'], ': ', evalDataA[k], sep = '')             
    FixedSummary <- substr(FixedSummary, 3, nchar(FixedSummary))

    #print(names(evalDataA))
    if (ModelType %in% c('lrm', "fixest_logit")) return(list(LogitText = LogitText(evalDataA), Summary = FixedSummary))
    if (ModelType %in% c('lm', 'fixest_ols')) return(list(LogitText = LinearText(evalDataA), Summary = FixedSummary))
    
  }
  
  
  # -----------------------------------------------------------------------------
  # (function starts here)
  
  
  # To do:
  #      - What to do with fixed effects? Set all of them to zero???
  #      - Warn if the user sets the value of an interaction term 'Var1 * Var2 = ...' because such values will be ignored an overwritten
  #        by the product of the values of Var1 and Var2 

  PrepMod <- PrepareModel(fit)          
  attach(PrepMod)
  on.exit(detach(PrepMod))

  if (!ModelType %in% c('lm', 'lrm', 'fixest_ols', "fixest_logit")) stop('DiscreteEffect supports logit and linear models only.')
  
  
  
  if (is.function(FixedValuesDefault))
    evalData <- apply(XMat, 2, function(x)do.call(FixedValuesDefault, list(unlist(x)))) else
    {
      evalData <-rep(FixedValuesDefault, ncol(XMat))
      names(evalData) <- colnames(XMat)
    }
  
  
  #change evalData as given in FixedValues
  if (length(FixedValues)>0)  for (i in 1:length(FixedValues)) evalData <- EvalFixedValue(evalData, FixedValues[i])
  
  
  #it <- InteractionTermPositions[i] 
  # correct fixed values for interaction terms
  for (it in InteractionTermPositions)
  {
    evalData[CoefMat[it, 'CoefName']] <- prod(evalData[CoefMat[subset(Interactions, InteractionCoef == it)$AtomarCoef, 'CoefName']])
  }
  
  
  #    LinkingFormula <- ~ Prob2 +  Prob2
  #    Probabilities <- list(list(pgNonEcoVotesYes= 0.5, pgEcoVotesYes= 0.5), list(pgNonEcoVotesYes= 0.8))
  #    Probabilities <- list(test=list(~sometext*0.5), list(~sometext*0.5), test2=list(~sometext*0.5))    

  if (is.null(names(Probabilities))) names(Probabilities) <- paste('Prob', 1:length(Probabilities), sep='')
  if (length(which( names(Probabilities) == '')) >0) stop('Please provide either names for all probabilities or for none. If no names are provided, probabilites are automatically named Prob1, ... Probn.')
  
  AllVars <-all.vars(LinkingFormula)
  if (mean(AllVars %in% names(Probabilities)==F)>0) stop("Some probabilities used in 'LinkingFormula' are not defined in 'Probabilities'.")
  

  FormulaRight <- as.character(LinkingFormula)[2] 
  ProbLog <- list()
  for (i in 1:length(Probabilities))
  {
    #ProbX <- Probabilities[i]
    ProbLog <-  append( ProbLog, list(ComputeEffect(Probabilities[i])), length(ProbLog))
    ProbReg <- gsub('\\.', '\\\\.', names(Probabilities)[i])
    ProbLog[[i]]$Found <- gregexpr(paste('\\b', ProbReg, '\\b', sep = ''), FormulaRight)
  }
  
  DeltaForm <- FormulaRight
  Offset <- 0
  for (i in 1:nchar(FormulaRight))
  {
    for (j in 1:length(Probabilities))
      if (i %in% as.numeric(ProbLog[[j]]$Found[[1]]))
      {                 
        #        print('-------')
        #        print(i)
        #        print(Offset)
        #        print(j)
        s <- which(i == ProbLog[[j]]$Found[[1]] )
        F1 <- i + Offset            
        attr(F1, 'match.length') <-  attr(ProbLog[[j]]$Found[[1]],'match.length')[s]
        attr(F1, 'useBytes') <-  attr(ProbLog[[j]]$Found[[1]],"useBytes")          
        RepLength <- attr(ProbLog[[j]]$Found[[1]],'match.length')[s]
        
        regmatches(DeltaForm,   F1) <-  ProbLog[[j]]$LogitText
        Offset <- Offset + nchar(ProbLog[[j]]$LogitText) - RepLength
        
      }
  }

  tableDE <- deltaMethod( setNames(CoefMat$Coef, CoefMat$CoefName), vcov.=VarMat, g=DeltaForm, level = Level)  
  rownames(tableDE) <- "Discrete Change"
  tableDE <- tableDE[, c('Estimate', 'SE')]
  pvalue <- 2*(1-pnorm(abs(data.matrix(tableDE['Estimate']/tableDE['SE']))))
  colnames(pvalue)[1] <- 'pvalue'
  tableDE <- cbind(tableDE, pvalue)
  
  
  if (PrintDebug)
    for (i in 1:length(Probabilities)) 
    {
      tableDE <- cbind(tableDE, ProbLog[[i]]$Summary)        
      colnames(tableDE)[ncol(tableDE)] <- names(Probabilities)[i]
    }
  
  class(tableDE) <- c(class(tableDE), "DiscreteEffects")
  return(tableDE)
}


# evalspecial, set interactions manually ####


DiscreteEffectIntA <- function(fit, evalspecial, medianspecial=NA){
  
  require(car)
  
  # check variable name: Intercept and "=" not allowed
  IsProperVariable <- function(i){
    if( names(fit$coef)[i]!='Intercept' && regexpr("=", names(fit$coef)[i])==-1 ){
      return(T)
    } else return(F)
  }
  
  
  # check whether variable with name vname has to be evaluated at
  # default value (=dval) or lower/upper exception value (=1, 2)
  
  evalexception <- function(vname, dval, lowup){
    if (length(evalspecial)>0  ){
      indx<- grep(vname, evalspecial[seq(from=1, to=length(evalspecial), by=3)])[1]
      if (length(indx)>0 & is.na(indx)==F){
        if (is.na( as.numeric(evalspecial[1+(indx-1)*3+lowup]))==F){
          return(as.numeric(evalspecial[1+(indx-1)*3+lowup]))
        }else{ stop("check evalspecial arguments")
          return(dval)
        }
      }else{return(dval)}
    }else{return(dval)}
  }
  
  
  
  
  
  # construct Logit formula
  LogitText <- function(variables){
    txt <- ""
    for (i in 1:(length(variables)-1)){
      txt <- paste(txt, names(variables)[i], "*", variables[i], "+", sep=" ")
    }
    txt <- paste(txt, names(variables)[length(variables)], "*", variables[length(variables)],  sep=" ")
    
    txt <- paste( "exp(", txt, ")/ (1+ exp(",txt, "))", sep="")
    return(txt)
  }
  
  SetChangeVariables <- function(evalData, evalNames, evalValues, Quartile){
    
    evalValues <- as.numeric(evalValues) 
    
    # values given by evalValues
    if (length(evalNames[is.na(evalValues)==F])>0)
      evalData[evalNames[is.na(evalValues)==F]] <- evalValues[is.na(evalValues)==F]
    
    # if evalValues is NA use a quartile (for one value)
    if (length(evalNames[is.na(evalValues)])==1)
      evalData[evalNames[is.na(evalValues)]] <- quantile(fit$x[,evalNames[is.na(evalValues)]  ], 0.25*Quartile)
    
    # if evalValues is NA use a quartile (for vectors)
    if (length(evalNames[is.na(evalValues)])>1)
      evalData[evalNames[is.na(evalValues)]] <- apply(fit$x[,evalNames[is.na(evalValues)]  ], 2, quantile, 0.25*Quartile)
    
    
    # correct medians of interaction terms
    for (i in 1:length(iterms)){
      tmp <- items[items[,2]==iterms[i],1]
      if (length(tmp)>0)
        evalData[iterms[i]] <- prod(evalData[tmp])
    }
    
    return(evalData) 
  }
  
  # -----------------------------------------------------------------------------
  # (function starts here)
  
  RobVar <- fit$var
  if (is.null(RobVar)) RobVar <- vcov(fit)
  
  ShiftIntercept <- 0
  if (   length(grep("^Intercept$",  names(fit$coef)))>0 | length(grep("^\\(Intercept\\)$", names(fit$coef)))>0 ) ShiftIntercept <- 1
  
  #    if (ShiftIntercept == 0) warning("No intercept found.")
  replist <- data.frame(orig = as.character(), repl = as.character())
  
  
  slist <- c(evalspecial[seq(from=1, to=length(evalspecial), by=3)], medianspecial[seq(from=1, to=length(medianspecial), by=2)])
  
  slist <- slist[grep("\\*", slist)]
  slist <- slist[is.na(slist) == F]
  if (length(slist) > 0){
    replist <- data.frame(orig = slist, repl = paste("RrRVarXx",1:length(slist), sep = "_" ) )
    
    for (i in 1:length(replist)){      
      sltext <- paste("^", gsub("\\*", "\\\\*", replist[i, "orig"]), "$" , sep = "" )
      evalspecial <- gsub(sltext, replist[i, "repl"], evalspecial)
      medianspecial <- gsub(sltext, replist[i, "repl"], medianspecial)       
      names(fit$coef) <- gsub(sltext, replist[i, "repl"], names(fit$coef))
      colnames(RobVar) <- gsub(sltext, replist[i, "repl"], colnames(RobVar))
      rownames(RobVar) <- gsub(sltext, replist[i, "repl"], rownames(RobVar))
      names(fit$coefficients) <- gsub(sltext, replist[i, "repl"], names(fit$coefficients))
    }
  }
  
  # list of interaction terms
  iterms <- grep('\\*', names(fit$coef))
  # list all variables which appear in an interaction    
  items <- matrix(0, nrow=0, ncol=2)
  
  # list all variables which appear in an interaction
  for(i in 1:length(coef(fit))){
    itemsinteractions <- unique(c( grep(paste(names(fit$coef)[i], '\\*', sep=' '), names(fit$coef)), grep(paste('\\*', names(fit$coef)[i] , sep=' '), names(fit$coef)) ))
    if (length(itemsinteractions)>0){
      items <-  rbind(items,
                      cbind(i, (itemsinteractions)) )}
  }
  
  # 1. Use median and medianspecial data to set values of evalData.
  #    Copy evalData to evalData1and evalData2. Overwrite all changed
  #    variables. 
  
  # vector with medians (interaction-variables are corrected later)
  if (ShiftIntercept>0){
    evalData <- apply(cbind(Intercept=rep(1,dim(fit$x)[1]),
                            fit$x), 2, median)
  } else{
    evalData <- apply(fit$x, 2, median)    
    
  }
  
  
  
  #change evalData as given in medianspecial
  if (length(medianspecial)>1){
    evalData[medianspecial[seq(from=1, to=length(medianspecial), by=2)]] <- as.numeric(medianspecial[seq(from=2, to=length(medianspecial), by=2)])
  }
  
  
  
  # 2. Set lower and upper values for change variables and fix interaction values.
  evalData1 <- SetChangeVariables(evalData, 
                                  evalspecial[seq(from=1, to=length(evalspecial), by=3)],
                                  evalspecial[seq(from=2, to=length(evalspecial), by=3)],
                                  1)
  
  evalData2 <- SetChangeVariables(evalData, 
                                  evalspecial[seq(from=1, to=length(evalspecial), by=3)],
                                  evalspecial[seq(from=3, to=length(evalspecial), by=3)],
                                  3)
  
  # replace interaction terms' names
  for (k in iterms){
    
    tmpl <- items[ items[,2]==k,1]
    tstr <- ''
    for (j in 1:length(tmpl)){
      tstr <- paste(tstr, tmpl[[j]], sep='x')
    }
    colnames(RobVar)[k] <- tstr
    rownames(RobVar)[k] <- tstr
    names(fit$coefficients)[k] <-tstr
    names(evalData1)[k] <- tstr
    names(evalData2)[k] <- tstr
  }
  
  

  texE <- paste(LogitText(evalData2), "-", LogitText(evalData1) ,sep="")
  tableDE <- deltaMethod(coef(fit), vcov.=RobVar, g=texE)
  rownames(tableDE) <- "Discrete Change"
  
  
  pvalue <- 2*(1-pnorm(abs(data.matrix(tableDE['Estimate']/tableDE['SE']))))
  colnames(pvalue)[1] <- 'pvalue'
  tableDE <- cbind(tableDE, pvalue)
  class(tableDE) <- c(class(tableDE), "DiscreteEffects")
  return(tableDE)
}

